import pandas as pd
import numpy as np
import seaborn as sns
import plotly.express as px
import matplotlib.pyplot as plt
import plotly.io as pio
pio.renderers.default = "plotly_mimetype+notebook"
For this excercise, we have written the following code to load the stock dataset built into plotly express.
stocks = px.data.stocks()
stocks.head()
| date | GOOG | AAPL | AMZN | FB | NFLX | MSFT | |
|---|---|---|---|---|---|---|---|
| 0 | 2018-01-01 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
| 1 | 2018-01-08 | 1.018172 | 1.011943 | 1.061881 | 0.959968 | 1.053526 | 1.015988 |
| 2 | 2018-01-15 | 1.032008 | 1.019771 | 1.053240 | 0.970243 | 1.049860 | 1.020524 |
| 3 | 2018-01-22 | 1.066783 | 0.980057 | 1.140676 | 1.016858 | 1.307681 | 1.066561 |
| 4 | 2018-01-29 | 1.008773 | 0.917143 | 1.163374 | 1.018357 | 1.273537 | 1.040708 |
Select a stock and create a suitable plot for it. Make sure the plot is readable with relevant information, such as date, values.
# YOUR CODE HERE
x = stocks["date"]
y = stocks["GOOG"]
fig, ax = plt.subplots(figsize=(16, 16))
ax.plot(x, y)
ax.set_title('Google stock')
ax.set_ylabel("stock value")
ax.set_xlabel("date")
ax.set_xticks(x[::15])
plt.show()
You've already plot data from one stock. It is possible to plot multiples of them to support comparison.
To highlight different lines, customise line styles, markers, colors and include a legend to the plot.
# YOUR CODE HERE
x = stocks["date"]
y1 = stocks["GOOG"]
y2 = stocks["AAPL"]
y3 = stocks["AMZN"]
y4 = stocks["FB"]
y5 = stocks["NFLX"]
y6 = stocks["MSFT"]
fig, ax = plt.subplots(figsize=(16, 16))
ax.plot(x, y1, label="GOOG")
ax.plot(x, y2, label="AAPL")
ax.plot(x, y3, label="AMZN")
ax.plot(x, y4, label="FB")
ax.plot(x, y5, label="NFLX")
ax.plot(x, y6, label="MSFT")
plt.legend(loc="best")
ax.set_title('Stocks')
ax.set_ylabel("stock value")
ax.set_xlabel("date")
ax.set_xticks(x[::15])
plt.show()
First, load the tips dataset
tips = sns.load_dataset('tips')
tips.head()
| total_bill | tip | sex | smoker | day | time | size | |
|---|---|---|---|---|---|---|---|
| 0 | 16.99 | 1.01 | Female | No | Sun | Dinner | 2 |
| 1 | 10.34 | 1.66 | Male | No | Sun | Dinner | 3 |
| 2 | 21.01 | 3.50 | Male | No | Sun | Dinner | 3 |
| 3 | 23.68 | 3.31 | Male | No | Sun | Dinner | 2 |
| 4 | 24.59 | 3.61 | Female | No | Sun | Dinner | 4 |
Let's explore this dataset. Pose a question and create a plot that support drawing answers for your question.
Some possible questions:
# YOUR CODE HERE
sns.boxplot(x='sex', y='tip', data=tips)
<AxesSubplot:xlabel='sex', ylabel='tip'>
sns.heatmap(tips.corr(), annot=True, cmap="coolwarm")
<AxesSubplot:>
Redo the above exercises (challenges 2 & 3) with plotly express. Create diagrams which you can interact with.
Hints:
stocks.head()
| date | GOOG | AAPL | AMZN | FB | NFLX | MSFT | |
|---|---|---|---|---|---|---|---|
| 0 | 2018-01-01 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
| 1 | 2018-01-08 | 1.018172 | 1.011943 | 1.061881 | 0.959968 | 1.053526 | 1.015988 |
| 2 | 2018-01-15 | 1.032008 | 1.019771 | 1.053240 | 0.970243 | 1.049860 | 1.020524 |
| 3 | 2018-01-22 | 1.066783 | 0.980057 | 1.140676 | 1.016858 | 1.307681 | 1.066561 |
| 4 | 2018-01-29 | 1.008773 | 0.917143 | 1.163374 | 1.018357 | 1.273537 | 1.040708 |
# YOUR CODE HERE
df_stocks = px.data.stocks()
df_stocks.head()
df_stocks.columns
name = df_stocks.columns
fig = px.line(df_stocks, x="date", y=["GOOG", "AAPL", "AMZN", "FB", "NFLX", "MSFT"], markers=True)
fig.show()
# YOUR CODE HERE
tip = px.scatter(tips, x="total_bill", y="tip", color="sex", facet_row="time", facet_col="smoker")
tip.show()
Recreate the barplot below that shows the population of different continents for the year 2007.
Hints:
#load data
df = px.data.gapminder()
df.head()
| country | continent | year | lifeExp | pop | gdpPercap | iso_alpha | iso_num | |
|---|---|---|---|---|---|---|---|---|
| 0 | Afghanistan | Asia | 1952 | 28.801 | 8425333 | 779.445314 | AFG | 4 |
| 1 | Afghanistan | Asia | 1957 | 30.332 | 9240934 | 820.853030 | AFG | 4 |
| 2 | Afghanistan | Asia | 1962 | 31.997 | 10267083 | 853.100710 | AFG | 4 |
| 3 | Afghanistan | Asia | 1967 | 34.020 | 11537966 | 836.197138 | AFG | 4 |
| 4 | Afghanistan | Asia | 1972 | 36.088 | 13079460 | 739.981106 | AFG | 4 |
# YOUR CODE HERE
df_2007 = df.query("year==2007")
df_2007
| country | continent | year | lifeExp | pop | gdpPercap | iso_alpha | iso_num | |
|---|---|---|---|---|---|---|---|---|
| 11 | Afghanistan | Asia | 2007 | 43.828 | 31889923 | 974.580338 | AFG | 4 |
| 23 | Albania | Europe | 2007 | 76.423 | 3600523 | 5937.029526 | ALB | 8 |
| 35 | Algeria | Africa | 2007 | 72.301 | 33333216 | 6223.367465 | DZA | 12 |
| 47 | Angola | Africa | 2007 | 42.731 | 12420476 | 4797.231267 | AGO | 24 |
| 59 | Argentina | Americas | 2007 | 75.320 | 40301927 | 12779.379640 | ARG | 32 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1655 | Vietnam | Asia | 2007 | 74.249 | 85262356 | 2441.576404 | VNM | 704 |
| 1667 | West Bank and Gaza | Asia | 2007 | 73.422 | 4018332 | 3025.349798 | PSE | 275 |
| 1679 | Yemen, Rep. | Asia | 2007 | 62.698 | 22211743 | 2280.769906 | YEM | 887 |
| 1691 | Zambia | Africa | 2007 | 42.384 | 11746035 | 1271.211593 | ZMB | 894 |
| 1703 | Zimbabwe | Africa | 2007 | 43.487 | 12311143 | 469.709298 | ZWE | 716 |
142 rows × 8 columns
fig = px.bar(df_2007, x="pop", y="continent", color="continent", text="pop")
fig.update_xaxes(autorange=True)
fig.update_traces(texttemplate="%{text:.2s}", textposition="outside")
fig.update_layout(uniformtext_minsize=8)
import matplotlib.pyplot as plt
import numpy as np
x = np.linspace(0, 10, 100)
y = 4 + 2 * np.sin(2 * x)
fig, ax=plt.subplots()
plt.ylim([0,10])
ax.plot(x, y, "r", linewidth=2.0)
ax.set_title("visualise sin wave")
ax.set_xlabel("x")
ax.set_ylabel("y")
plt.show()